{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Final Project"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn as sk\n",
    "import os\n",
    "import sys\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "from six.moves import cPickle as pickle\n",
    "from six.moves import range\n",
    "from timeit import default_timer as timer\n",
    "\n",
    "# Below code inspired by feature extraction technique detailed by Hatham Fayek here:\n",
    "#    http://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "    #only needed if tarfile is not already extracted.\n",
    "    import tarfile\n",
    "    t = tarfile.open('UrbanSound8K.tar.gz', mode=\"r:gz\")\n",
    "    t.extractall()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>slice_file_name</th>\n",
       "      <th>fsID</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>salience</th>\n",
       "      <th>fold</th>\n",
       "      <th>classID</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100032-3-0-0.wav</td>\n",
       "      <td>100032</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.317551</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>dog_bark</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100263-2-0-117.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>58.5</td>\n",
       "      <td>62.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100263-2-0-121.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>60.5</td>\n",
       "      <td>64.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100263-2-0-126.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>63.0</td>\n",
       "      <td>67.000000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100263-2-0-137.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>68.5</td>\n",
       "      <td>72.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      slice_file_name    fsID  start        end  salience  fold  classID  \\\n",
       "0    100032-3-0-0.wav  100032    0.0   0.317551         1     5        3   \n",
       "1  100263-2-0-117.wav  100263   58.5  62.500000         1     5        2   \n",
       "2  100263-2-0-121.wav  100263   60.5  64.500000         1     5        2   \n",
       "3  100263-2-0-126.wav  100263   63.0  67.000000         1     5        2   \n",
       "4  100263-2-0-137.wav  100263   68.5  72.500000         1     5        2   \n",
       "\n",
       "              class  \n",
       "0          dog_bark  \n",
       "1  children_playing  \n",
       "2  children_playing  \n",
       "3  children_playing  \n",
       "4  children_playing  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_sound = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')\n",
    "raw_sound.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.          0.          0.         ..., -0.00784302 -0.00761414\n",
      " -0.00749207] 44100\n"
     ]
    }
   ],
   "source": [
    "import soundfile as sf\n",
    "data, samplerate = sf.read('UrbanSound8K/audio_const_rate/fold1/7383-3-0-0.wav')\n",
    "print(data, samplerate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'fold10']\n"
     ]
    }
   ],
   "source": [
    "fold_list = []\n",
    "for i in range(1,11):\n",
    "    fold_name = 'fold' + str(i)\n",
    "    fold_list.append(fold_name)\n",
    "print(fold_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from python_speech_features import mfcc\n",
    "from python_speech_features import logfbank\n",
    "import scipy.io.wavfile as wave\n",
    "from scipy.fftpack import dct "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Exceptions:  0\n"
     ]
    }
   ],
   "source": [
    "from os import listdir, system\n",
    "from os.path import isfile, join\n",
    "mfcc_data = []\n",
    "exception_count = 0\n",
    "for i in range(10):\n",
    "    mypath = 'UrbanSound8K/audio/'+ fold_list[i] + '/'\n",
    "    files = [f for f in listdir(mypath) if isfile(join(mypath, f))]\n",
    "    for f in files:\n",
    "        try:\n",
    "            fn = mypath + f\n",
    "            \n",
    "            sig, rate = sf.read(fn)\n",
    "            mfcc_feat = mfcc(sig,rate, winlen=0.0232,winstep=0.0116,nfilt=40,numcep=25)\n",
    "            sample = logfbank(sig,rate,winlen=0.0232,winstep=0.0116,nfilt=40)\n",
    "        \n",
    "\n",
    "\n",
    "        except:\n",
    "            print(f)\n",
    "            exception_count += 1\n",
    "            continue\n",
    "\n",
    "        l_row = raw_sound.loc[raw_sound['slice_file_name']==f].values.tolist()\n",
    "        label = l_row[0][-1]\n",
    "        #print(label)\n",
    "        fold = i+1\n",
    "        mfcc_data.append([sample, rate, label, fold])  \n",
    "print(\"Exceptions: \", exception_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8732"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(mfcc_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cols = ['sample', 'rate', 'label', 'fold']\n",
    "mfcc_pd = pd.DataFrame(data=mfcc_data, columns=cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8389830508474576\n"
     ]
    }
   ],
   "source": [
    "# check if same shape for each sample\n",
    "count = {}\n",
    "for x in mfcc_pd.index:\n",
    "    try:\n",
    "        count[mfcc_pd.loc[x,'sample'].shape[0]] += 1\n",
    "    except KeyError:\n",
    "        count[mfcc_pd.loc[x,'sample'].shape[0]] = 1\n",
    "\n",
    "        \n",
    "print(max(count.values())/mfcc_pd.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# flatten and ravel samples\n",
    "row_const = max(count)\n",
    "sample_pd = pd.DataFrame()\n",
    "for x in mfcc_pd.index:\n",
    "    sample_pd = sample_pd.append(pd.Series(np.pad(mfcc_pd.loc[x,'sample'],\n",
    "                                      ((0, row_const-mfcc_pd.loc[x,'sample'].shape[0]),\n",
    "                                       (0,0)), 'constant').ravel()),\n",
    "                                         ignore_index=True)\n",
    "    \n",
    "# each sample in the \"sample\" column is now an 1d array. To model, expand features into multiple columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# add label and fold info to data.\n",
    "sample_pd['label'] = fcc_pd['label']\n",
    "sample_pd['fold'] = fcc_pd['fold']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#pickle.dump(sample_pd, open('2dzeropadded_data.p', 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "done\n"
     ]
    }
   ],
   "source": [
    "print('done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
